#!/usr/bin/env Rscript

# Load the external and internal CSV files
external <- read.csv("./external.csv", stringsAsFactors = FALSE)
internal <- read.csv("./internal.csv", stringsAsFactors = FALSE)

# Normalize the 'question' columns to ensure they match
external$question <- trimws(tolower(external$question))
internal$question <- trimws(tolower(internal$question))

# Merge the data frames by 'question' column
merged_data <- merge(external, internal, by = "question", suffixes = c("_external", "_internal"))

# Ensure 'response' columns are numeric
merged_data$response_external <- as.numeric(merged_data$response_external)
merged_data$response_internal <- as.numeric(merged_data$response_internal)

# Use the full dataset (without filtering) for Wilcoxon signed-rank test
if (nrow(merged_data) > 0) {
  wilcoxon_test <- wilcox.test(merged_data$response_external, merged_data$response_internal, paired = TRUE)
  # Format p-value to 3 significant digits in scientific notation
  formatted_p_value <- formatC(wilcoxon_test$p.value, format = "e", digits = 3)
  cat("Wilcoxon signed-rank test p-value:", formatted_p_value, "\n")
} else {
  cat("No data available.\n")
}
